
############################################################################
#####################	   read in data       ###########################
############################################################################

logfpkm2nd <- read.table("../data/2nd.reprogramming.lg2.all.fpkm.txt",header=T,row.names=1)
n_deg <- read.table("Gfold/cutoff.0.58/naive.2nd.deg")[,1]

n_path <- c("hiF_r1","hiF_r2","he0_r1","he0_r2","he2_r1","he2_r2","he6_r1","he6_r2","n8_r1","n8_r2","n8_r3","n12_r1","n12_r2","n14_r1","n14_r2","n14_r3","n20_r1","n20_r2","n20_r3","n24p_r1","n24p_r2","n24m_r1","n24m_r2","niPS_r1","niPS_r2")
nData_tmp <- logfpkm2nd[,n_path]
nfpkm2nd <- 2**nData_tmp - 1

n_time_point <- c("hiF","he0","he2","he6","n8","n12","n14","n20","n24pdox","n24mdox","niPS")
n_label <- c("hiF-T","0d","2d","6d","8d","12d","14d","20d","24d+dox","24d-dox","niPSC-T")
nData2ndfpkm <- cbind(apply(nfpkm2nd[,1:2],1,mean),apply(nfpkm2nd[,3:4],1,mean),apply(nfpkm2nd[,5:6],1,mean),apply(nfpkm2nd[,7:8],1,mean),apply(nfpkm2nd[,9:11],1,mean),apply(nfpkm2nd[,12:13],1,mean),apply(nfpkm2nd[,14:16],1,mean),apply(nfpkm2nd[,17:19],1,mean),apply(nfpkm2nd[,20:21],1,mean),apply(nfpkm2nd[,22:23],1,mean),apply(nfpkm2nd[,24:25],1,mean))
colnames(nData2ndfpkm) <- n_time_point
rownames(nData2ndfpkm) <- rownames(nfpkm2nd)
nData <- log2(nData2ndfpkm[,]+1)
n_deg <- intersect(n_deg,rownames(nData))
nData <- nData[n_deg,]

data <- read.table("../data/nsmb.2660-S2.txt",header=T,row.names=1)
Oocyte <- 1:3; Zygote <- 4:6; cell2 <- 7:12; cell4 <- 13:24; cell8 <- 25:44; Morula <- 45:60; 
MTE <- c(64,66,67,69,72,76:79);
PTE <- c(61:63,65,68,70,71,81,82); 
PE <- c(84:90);
EPI <- c(73:75,80,83);
hESC0 <- 91:98; hESC10 <- 99:124
avg <- cbind(apply(data[,Oocyte],1,mean),apply(data[,Zygote],1,mean),apply(data[,cell2],1,mean),apply(data[,cell4],1,mean),apply(data[,cell8],1,mean),apply(data[,Morula],1,mean),apply(data[,MTE],1,mean),apply(data[,PTE],1,mean),apply(data[,PE],1,mean),apply(data[,EPI],1,mean),apply(data[,hESC0],1,mean),apply(data[,hESC10],1,mean))
time_point <- c("Oocyte","Zygote","X2cell","X4cell","X8cell","Morula","MTE","PTE","PE","EPI","hESC0","hESC10")
dev_labels <- c("Oocyte","Zygote","2cell","4cell","8cell","Morula","MTE","PTE","PE","EPI","hESC0","hESC10")
colnames(avg) <- time_point
development_path <- time_point
dData <- log2(avg+1)
############################################################################
#####################      cluster by correlation      #####################
############################################################################
library(amap)
k <- 14
set.seed(4)

km <- kmeans(nData,k)
nData <- log2(nData2ndfpkm[,]+1)
n_deg <- intersect(n_deg,rownames(nData))
nData <- nData[n_deg,]

km <- Kmeans(nData,k,method = "correlation")

selected_cluster <- c(10,11,4,5,8,6,2)
cccol <- c("#CE0013","#FA8072","#32CD32","#7FFFD4","#3A5FCD","#004138","#00CED1","#190246","#EEEE00")
clusterCol <- cccol[c(3,4,1,2,5,6,7)]

pdf(file = "SFig2D_1.pdf", width = 5.55, height = 3);
for (each_i in seq(length(selected_cluster))){
	each <- selected_cluster[each_i]
	modGenes = names(which(km$cluster==each))
	v1 = apply(nData[modGenes,],2,mean)
	n <- length(modGenes)
	sd <- apply(nData[modGenes,],2,sd)
	alpha <- 0.05
	v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
	v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
	# plot(v1,lwd=3,type="l",col=clusterCol[each],xlab=NA,ylab="Expression level(Log FPKM)",ylim=c(0,4),xaxt="n")
	plot(v1,lwd=3,type="l",col=clusterCol[each_i],xlab=NA,ylab="Log2(FPKM+1)",xaxt="n",main=paste(length(modGenes)," genes",sep=""))
	axis(side=1,1:length(n_time_point),rep("",11),las=2);axis(side=2);box()
	polygon(c(1,1:length(n_time_point),length(n_time_point):2),c(v2[1],v3,v2[length(n_time_point):2]),col=adjustcolor("grey", alpha.f = 0.4),border=NA)
	# fileName = paste(k,"cluster_",each,"_gene.txt",sep="")
	# write.table(cbind(modGenes),file=fileName,row.names=F,col.names=F,quote=F,sep="\t")
}
dev.off()

pdf(file = "SFig2D_2.pdf", width = 5.2, height = 2);
plot_data <- nData
for (each_i in seq(length(selected_cluster))){
	each <- selected_cluster[each_i]
	plotMatrix <- plot_data[names(which(km$cluster==each)),]
	all_exp <- c(as.matrix(plotMatrix))
	zmax <- quantile(all_exp,0.99)
	zmin <- quantile(all_exp,0.01)
	ColorRamp <- colorRampPalette(c("lightblue","white","red"), bias=1)(10000)   #color list
	ColorLevels <- seq(to=zmax,from=zmin, length=10000)   #number sequencemodGenes <- names(which(km$cluster==each))
	plotMatrix[plotMatrix<zmin] <- zmin
	plotMatrix[plotMatrix>zmax] <- zmax
	par(oma=c(0.5,0.5,0.5,0.5),mar=c(8,2,2,2))
	layout(matrix(c(1,2,2,2,2,2,2,2,2),ncol=9,nrow=1,byrow=T))
	image(1,ColorLevels,t(matrix(data=ColorLevels, nrow=length(ColorLevels),ncol=1)),col=t(ColorRamp), xlab="",ylab="",cex.axis=2,xaxt="n",yaxt="n",useRaster=T)
	axis(side=2,c(zmin,round((zmax-zmin)/2,1),zmax),labels=c(round(zmin,2),round((zmax-zmin)/2,1),round(zmax,1)))
	image(1:ncol(plotMatrix), 1:nrow(plotMatrix), t(plotMatrix), xaxt="n", yaxt="n", col=ColorRamp, xlab="", ylab="")
	# axis(side=1,1:ncol(plotMatrix),labels=colnames(plot_data),cex.axis=1.2,las=2);box()
}
dev.off()


pdf(file = "SFig2E.pdf", width = 10, height = 5)
for (each_i in seq(length(selected_cluster))){
	each <- selected_cluster[each_i]
	file <- read.table(paste("../data/",each,"_GO.txt",sep=''),sep='\t')
	file[,3] <- -log10(file[,3])
	file <- file[order(file[,3]),]
	file <- file[!duplicated(file),]
	par(mar=c(8,30,8,5))
	# barplot(file[,3],horiz=T,names=file[,2],las=2,border=F,col=clusterCol[each_i],space=0.5,axes=F)
	barplot(file[,3],horiz=T,names=file[,2],las=2,border=F,col="grey",space=0.5,axes=F)
	axis(side=1);mtext("-log10(p-value)",side=1,line=2)
}
dev.off()

# # gene expression of each cluster in development path
# pdf(file = "SFig2D_3.pdf", width = 5.4, height = 2);
# plot_data <- dData
# for (each_i in seq(length(selected_cluster))){
# 	each <- selected_cluster[each_i]
# 	genes <- intersect(names(which(km$cluster==each)),row.names(plot_data))
# 	plotMatrix <- plot_data[genes,]
# 	all_exp <- c(as.matrix(plotMatrix))
# 	zmax <- quantile(all_exp,0.99)
# 	zmin <- quantile(all_exp,0.01)
# 	ColorRamp <- colorRampPalette(c("lightblue","white","red"), bias=1)(10000)   #color list
# 	ColorLevels <- seq(to=zmax,from=zmin, length=10000)   #number sequencemodGenes <- names(which(km$cluster==each))
# 	plotMatrix[plotMatrix<zmin] <- zmin
# 	plotMatrix[plotMatrix>zmax] <- zmax
# 	par(oma=c(0.5,0.5,0.5,0.5),mar=c(8,2,2,2))
# 	layout(matrix(c(1,2,2,2,2,2,2,2,2),ncol=9,nrow=1,byrow=T))
# 	image(1,ColorLevels,t(matrix(data=ColorLevels, nrow=length(ColorLevels),ncol=1)),col=t(ColorRamp), xlab="",ylab="",cex.axis=2,xaxt="n",yaxt="n",useRaster=T)
# 	axis(side=2,c(zmin,round((zmax-zmin)/2,1),zmax),labels=c(round(zmin,2),round((zmax-zmin)/2,1),round(zmax,1)))
# 	image(1:ncol(plotMatrix), 1:nrow(plotMatrix), t(plotMatrix), xaxt="n", yaxt="n", col=ColorRamp, xlab="", ylab="")
# 	axis(side=1,1:ncol(plotMatrix),labels=colnames(plot_data),cex.axis=1.2,las=2);box()
# }
# dev.off()

# pdf(file = "SFig2D_4.pdf", width = 5.55, height = 3);
# plot_data <- dData
# for (each_i in seq(length(selected_cluster))){
# 	each <- selected_cluster[each_i]
# 	genes <- intersect(names(which(km$cluster==each)),row.names(plot_data))
# 	plot(apply(plot_data[genes,],2,mean),lwd=3,type="l",col=clusterCol[each_i],xlab=NA,ylab="Log2(FPKM+1)",xaxt="n")
# 	axis(side=1,1:ncol(plot_data),rep("",ncol(plot_data)),las=2);axis(side=2);box()
# }
# dev.off()

pdf(file = "SFig2E.pdf", width = 10, height = 4.5)
for (each_i in seq(length(selected_cluster))){
	each <- selected_cluster[each_i]
	file <- read.table(paste("../data/",each,"_GO.txt",sep=''),sep='\t')
	file[,3] <- -log10(file[,5])
	file <- file[order(file[,5]),]
	file <- file[!duplicated(file),]
	par(mar=c(8,35,8,5))
	# barplot(file[,3],horiz=T,names=file[,2],las=2,border=F,col=clusterCol[each_i],space=0.5,axes=F)
	barplot(file[,3],horiz=T,names=file[,2],las=2,border=F,col="grey",space=0.5,axes=F)
	axis(side=1);mtext("-log10(p-value)",side=1,line=2)
}
dev.off()